To increase the resolution of the scRNAseq, we utilized the dataset from Tasic et al, with over 43,000 genes and 23,000 cells.

liger_seqfish = createLiger(list(tasic = tasic, seqfish = seqfish))
liger_seqfish %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_seqfish %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_seqfish %<>% runTSNE()
names(liger_seqfish@clusters)=rownames(liger_seqfish@tsne.coords)
seqfish_purity = calcPurity(liger_seqfish, seq_clust)
tasic_purity = calcPurity(liger_seqfish, tasic_clust)
seqfish_ari = calcARI(liger_seqfish, seq_clust)
tasic_ari = calcARI(liger_seqfish, tasic_clust)
alignment = calcAlignment(liger_seqfish)
seqfish_stats = c(seqfish_purity, seqfish_ari, tasic_purity, tasic_ari, alignment)
by_dataset = plotByDatasetAndCluster(liger_seqfish, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_seqfish, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_seqfish, clusters = tasic_clust, return.plots = T)[[2]]
by_seqfish_cluster = plotByDatasetAndCluster(liger_seqfish, clusters = seq_clust, return.plots = T)[[2]]
seqfish_stats
## [1] 0.63431434 0.01600115 0.47769437 0.16224313 0.94766249
by_dataset

by_liger_cluster

by_tasic_cluster
## Warning: Removed 1 rows containing missing values (geom_text).

by_seqfish_cluster
## Warning: Removed 1 rows containing missing values (geom_text).

To directly compare the performance on the given seqfish data to a baseline dataset, the 160 gene STARmap dataset utilized in Wang and Allen et. al was downsampled to 120 genes and integrated with the scRNAseq dataset.

starmap_downsampled = starmap_160[sample(1:160, 120, replace = FALSE),]
liger_120 = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
liger_120 %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_120 %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_120 %<>% runTSNE()
names(liger_120@clusters)=rownames(liger_120@tsne.coords)
starmap_purity = calcPurity(liger_120, starmap_160_clust)
tasic_purity = calcPurity(liger_120, tasic_clust)
starmap_ari = calcARI(liger_120, starmap_160_clust)
tasic_ari = calcARI(liger_120, tasic_clust)
alignment = calcAlignment(liger_120)
starmap_stats = c(seqfish_purity, seqfish_ari, tasic_purity, tasic_ari, alignment)
by_dataset = plotByDatasetAndCluster(liger_120, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_120, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_120, clusters = tasic_clust, return.plots = T)[[2]]
by_starmap_cluster = plotByDatasetAndCluster(liger_120, clusters = starmap_clust, return.plots = T)[[2]]
starmap_stats
## [1] 0.63431434 0.01600115 0.75381828 0.44315609 0.81398465
by_dataset

by_liger_cluster

by_tasic_cluster

by_starmap_cluster

To determine the impact of the number of genes in spatial data, a dataset with a larger number of genes was utilized. The 1020 gene STARmap dataset utilized in Wang and Allen et. al was repeatedly downsampled and integrated with the much larger scRNAseq dataset.

starmap_downsampled = starmap_1020[sample(1:1020, 700, replace = FALSE),]
liger_700 = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
liger_700 %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_700 %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_700 %<>% runTSNE()
names(liger_700@clusters)=rownames(liger_700@tsne.coords)
starmap_purity = calcPurity(liger_700, starmap_clust)
tasic_purity = calcPurity(liger_700, tasic_clust)
starmap_ari = calcARI(liger_700, starmap_clust)
tasic_ari = calcARI(liger_700, tasic_clust)
alignment = calcAlignment(liger_700)
by_dataset = plotByDatasetAndCluster(liger_700, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_700, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_700, clusters = tasic_clust, return.plots = T)[[2]]
by_starmap_cluster = plotByDatasetAndCluster(liger_700, clusters = starmap_clust, return.plots = T)[[2]]
c(starmap_purity, starmap_ari, tasic_purity, tasic_ari, alignment)
## [1] 0.5889003 0.3152321 0.8867461 0.6315865 0.8885681
by_dataset

by_liger_cluster

by_tasic_cluster

by_starmap_cluster

To account for the randomness associated with downsampling genes and the stochastic nature of our method, we repeated these analyses several times and took an average across runs.

stats_starmap = matrix(ncol = 6)
colnames(stats_starmap) = c("genes","purity_spatial","purity_rnaseq","ari_spatial","ari_rnaseq","alignment")
stats_seqfish = stats_starmap
for(n in 1:10){
  for(i in seq(900, 100, -100)){
      starmap_downsampled = starmap_1020[sample(1:1020, i, replace = FALSE),]
      liger_down = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
      liger_down %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
      liger_down %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
      liger_down %<>% runTSNE()
      names(liger_down@clusters)=rownames(liger_down@tsne.coords)
      starmap_purity = calcPurity(liger_down, starmap_clust)
      tasic_purity = calcPurity(liger_down, tasic_clust)
      starmap_ari = calcARI(liger_down, starmap_clust)
      tasic_ari = calcARI(liger_down, tasic_clust)
      alignment = calcAlignment(liger_down)
      stats_starmap = rbind(stats, c(i, starmap_purity, tasic_purity, starmap_ari,  tasic_ari,alignment))
  }
  liger_seqfish = createLiger(list(tasic = tasic, seqfish = seqfish))
  liger_seqfish %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
  liger_seqfish %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
  liger_seqfish %<>% runTSNE()
  names(liger_seqfish@clusters)=rownames(liger_seqfish@tsne.coords)
  starmap_purity = calcPurity(liger_seqfish, seq_clust)
  tasic_purity = calcPurity(liger_seqfish, tasic_clust)
  starmap_ari = calcARI(liger_seqfish, seq_clust)
  tasic_ari = calcARI(liger_seqfish, tasic_clust)
  alignment = calcAlignment(liger_seqfish)
  stats_seqfish = rbind(stats, c(120, starmap_purity, tasic_purity, starmap_ari,  tasic_ari,alignment))
}
stats_starmap = stats_starmap[2:nrow(stats_starmap),]
stats_seqfish = stats_seqfish[2:nrow(stats_seqfish),]
stats_starmap = stats[order(stats_starmap[,1]),]
Downsampling summary

Downsampling summary